
#####################################################################
##
##  Replication Package for Appendix to:
##
##    Kevin Bryan and Jorge Guzman. 2023. "Entrepreneurial Migration". 
##           The Review of Economics and Statistics.
##
##
##
#####################################################################


##  Overview: Results are presented in the order they are introduced in the paper. First
##
##    Script must be run in order.
##
##

#clear environment
rm(list=ls())
#install required packages
list.of.packages <- c("stringr","geosphere", "lfe","xtable",  "readstata13",  "stargazer",  "ggplot2",  
                      "survival", "fixest",  "ggpubr",  "permute",  "tidyr",  "Hmisc","dplyr",
                      "pastecs", "binsreg","testthat","magrittr","ggrepel","remotes",
                      "lmtest","sandwich","utils","estimatr","grid","gridExtra")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)

if(!(c("starpolishr") %in% installed.packages()[,"Package"])){
  remotes::install_github("ChandlerLutz/starpolishr")  
}

#setwd("C:/Users/jag2367/Dropbox/Bryan_Guzman_Migration/Final_Submission/Data_Appendix")

#Load utilities (also loads packages)
source("utils/utils.R")
source("utils/star_panel.R")
source("bryan_guzman_functions.R")
source("estimate_city_utility.R")





migration.cbsa <- get_agg_cbsa() 

migration.cbsa <- migration.cbsa %>% 
  filter(!(source.cbsa %in% bad.cbsas) & !(dest.cbsa %in% bad.cbsas))

moves <- get_moves_in_out_from_migration_cbsa(migration.cbsa) %>%
  left_join(get_cbsa_names()) %>%
  filter(!(cbsa %in% data.bad_cbsa)) ##TO DO: remove this bad cbsas piece?


moves <- moves%>% 
  left_join(get_startup_births()) %>%
  left_join(get_cbsa_pop())

moves <- moves %>%
  left_join(get_moves_in_out_from_migration_cbsa(migration.cbsa)) %>%
  mutate(net.moves = log(moves.in/moves.out),
         total.moves = moves.in + moves.out)


#################################
##
##
## Figure A1: Utility by different industries
##
##



library(ggrepel)

plot_chart <- function(util_list1, util_list2 , suffix, title, xlab_, ylab_){
  
  x = paste0("log.city.util",suffix[1])
  y = paste0("log.city.util",suffix[2])
  name = paste0("cbsa.name",suffix[1])
  
  
  chart_table <- util_list1 %>%
    full_join(util_list2, by=c("cbsa"), suffix=suffix) %>%
    filter(!is.na(!!sym(x)) & !is.na(!!sym(y)))
  
  
  chart_table <- chart_table %>%
    filter(!grepl("Baltimore", !!sym(name)))
  
  gg <- ggplot(aes(x=!!sym(x), y=!!sym(y)), data=chart_table) +
    geom_smooth(method="lm") +  
    #geom_text(aes(label=!!sym(name)), size=3, color="grey40") +
    geom_text_repel(aes(label=!!sym(name)), size=3, color="grey40") +
    geom_point(shape=1, size=2) +
    xlab(xlab_) + ylab(ylab_)+
    theme_bw()+ theme(legend.position = "none") +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
    ggtitle(title) + 
    scale_x_continuous(limits = c(-4.5, -1.3)) + 
    scale_y_continuous(limits = c(-4.5,-1.3))  
  return (gg)
  
}



file_suffixes  = c("corp_clust_high_tech.all", "corp_clust_traded_services.all","corp_healthcare.all","corp_IT.all",
                   "corp.all")




get_city_utils2 <- function(file_suffix, cutoff_joint = 4 ) {
  csv_path = str_c("./data/cbsa_matrixCurrent",file_suffix,".csv")
  print(str_c("Loading csv from path: ", csv_path))
  cbsa_matrix <- bg.fx.load_migration_data_by_cbsa(filepath = csv_path)
  utils <- estimate_city_utility(cbsa_matrix, cutoff_joint = cutoff_joint)
  return (utils)
}

city_util_list = list()
for(suff in file_suffixes){
  utils <- get_city_utils2(file_suffix =  suff)
  city_util_list[[suff]]<- utils
}


g1 <- plot_chart(city_util_list$corp.all, 
                 city_util_list$corp_healthcare.all, 
                 suffix=c("",".healthcare"), 
                 "A. Health Sector",
                 xlab="Estimated Log Utility for all Delaware Corporations",
                 ylab="Estimated Log Utility for Delaware Corporations in Health")


g2 <- plot_chart( 
  city_util_list$corp.all, city_util_list$corp_clust_high_tech.all,  
  suffix=c("",".services"), 
  "B. High Tech Sector",
  xlab="Estimated Log Utility for all Delaware Corporations",
  ylab="Estimated Log Utility for Delaware Corporations in High Tech")


g3 <- plot_chart(
  city_util_list$corp.all,
  city_util_list$corp_IT.all, 
  suffix=c("",".services"), 
  "C. IT Sector",
  xlab="Estimated Log Utility for all Delaware Corporations",
  ylab="Estimated Log Utility for Delaware Corporations in IT")


g4 <- plot_chart(city_util_list$corp.all,
                 city_util_list$corp_clust_traded_services.all, 
                 suffix=c("",".services"), 
                 "D. Services Sector",
                 xlab="Estimated Log Utility for all Delaware Corporations",
                 ylab="Estimated Log Utility for Delaware Corporations in Services")



ggarrange(g1, g2 ,g3 ,g4)

ggsave(filename="../tex/different_types_it_health_high_tech_services.png", device = 'png', dpi = 450, width=15,height=10)#width=1100, height=800 ,units="px")/


######################################
##
##
##  Figure A2: Data Coverage
##

#TODO: Delete these lines
#install.packages("trace")
##
#library(trace)
all_states_flows = bg.fx.load_migration_data_by_state()  %>%
  group_by(sourcestate) %>%
  tally() %>%
  rename("state" = "sourcestate")


usmap::plot_usmap("states", labels = TRUE,label_color = "white", 
                  data= all_states_flows, values="n") +
  scale_colour_grey() +  
  theme(legend.position="none")
  

ggsave("../tex/DataCoverage.png", width=12, height=9, units = "in")



#####################################
##
##
## TAble A1: LLC Cities Ranking
##
##
migration.cbsa.llc <-  get_agg_cbsa(include_llcs=TRUE)
migration.cbsa.llc <- migration.cbsa.llc %>% 
  filter(!(source.cbsa %in% bad.cbsas) & !(dest.cbsa %in% bad.cbsas))

city.utils <- estimate_obs_and_recpi_utils(migration.cbsa)

city.utils.llc <- estimate_obs_and_recpi_utils(migration.cbsa.llc) %>%
  select(cbsa, city.util, log.city.util, city.rank) %>%
  rename(
    "city.util.llc" = "city.util",
    "log.city.util.llc" = "log.city.util",
    "city.rank.llc" = "city.rank"
  )



cities.dataset <- get_moves_in_out_from_migration_cbsa(migration.cbsa) %>%
  left_join(get_moves_in_out_from_migration_cbsa(migration.cbsa.llc), by=c("cbsa"), suffix=c("",".llc")) %>%
  left_join(get_cbsa_names(), by=c("cbsa")) %>%
  left_join(get_startup_births() %>% mutate(cbsa=as.numeric(cbsa)), by=c("cbsa")) %>%
  left_join(get_cbsa_pop(), by=("cbsa")) %>%
  select(-cbsa.name) %>%
  right_join(city.utils, by=("cbsa")) %>%
  left_join(city.utils.llc , by=("cbsa"))



citylist.all <- cities.dataset
names(citylist.all)
citylist.llc <- rerank(citylist.all %>% filter(pop2010>1000000) 
                       ,also_llc = TRUE) %>%
  arrange(city.rank.llc) %>%
  filter(!is.na(city.util.llc)) %>%  ## Remove the cities with NA utility
  select(city.util, cbsa, cbsa.name, moves.in.llc, moves.out.llc, pop2010, city.util, city.rank.llc, city.util.llc) %>%
  mutate(moves.in.llc = round(moves.in.llc),
         moves.out.llc = round(moves.out.llc),
         city.util= as.character(round(log(city.util),3)),
         city.util.llc= as.character(round(log(city.util.llc),3)),
         pop2010 = prettyNum(pop2010,big.mark = ",")
  ) %>%    
  rename(
    "Log Utility"="city.util",
    "Log Utility LLC"="city.util.llc",
    "CBSA" = "cbsa",
    "CBSA Name" = "cbsa.name",
    "LLC Moves In" = "moves.in.llc",
    "LLC Moves Out" = "moves.out.llc",
    "2010 Pop." = "pop2010",
    "LLC Rank" = "city.rank.llc"
  )


citylist.llc2 <- rerank(citylist.all %>% filter(pop2010>1000000) 
                        ,also_llc = TRUE) %>%
  arrange(city.rank.llc) %>%
  filter(!is.na(city.util.llc)) %>%  ## Remove the cities with NA utility
  select(city.rank.llc, city.util, city.util.llc, cbsa, cbsa.name, moves.in.llc, moves.out.llc, pop2010, city.rank) %>%
  mutate(moves.in.llc = round(moves.in.llc),
         moves.out.llc = round(moves.out.llc),
         city.util= as.character(round(log(city.util),3)),
         city.util.llc= as.character(round(log(city.util.llc),3)),
         pop2010 = prettyNum(pop2010,big.mark = ",")
  ) %>%    
  rename(
    "LLC Rank" = "city.rank.llc",
    "Log Utility LLC"="city.util.llc",
    "CBSA" = "cbsa",
    "CBSA Name" = "cbsa.name",
    "LLC Moves In" = "moves.in.llc",
    "LLC Moves Out" = "moves.out.llc",
    "2010 Pop." = "pop2010",
    "Corp. Rank" = "city.rank"
  )

citylist.llc[1:5,]

outtex <- stargazer(citylist.llc, summary=F, rownames = F, 
                    title="Estimated Utility for Large US Cities Based on LLCs (Population over 1 million in 2010).")

outtex <- gsub("ccccccc","rrrlrrr", outtex)
outtex <- gsub("\\begin{tabular}", "\\resizebox{\\textwidth}{!}{\\begin{tabular}", outtex, fixed = T)
outtex <- gsub("\\end{tabular}", "\\end{tabular}}", outtex, fixed = T)

outtex
writeLines(outtex,file("../tex/appendix.city_list.by_llc.tex")) 




#################################
##
##
## TAble A2:Long City List
##
##

#Set the rank to NA if the city util is NA
citylist.all$city.rank.llc[is.na(citylist.all$city.util.llc)] <- NA

citylist.printall <- rerank(citylist.all,also_llc = TRUE)
citylist.printall$city.rank.llc[is.na(citylist.printall$city.util.llc)] <- NA


citylist.printall <- citylist.printall%>%
  arrange(city.rank) %>%
  mutate(moves.in = round(moves.in),
         moves.out = round(moves.out),
         city.util = as.character(round(log(city.util),3)),   
         city.util.llc= as.character(round(log(city.util.llc),3)),
         pop2010 = prettyNum(pop2010,big.mark = ",")
         
  ) %>%    
  select(city.util, city.rank, cbsa, cbsa.name, moves.in, moves.out, pop2010, city.rank.llc, city.util.llc) %>%
  rename(
    "Rank" = "city.rank",
    "Log Utility"="city.util",
    "Log Utility LLC"="city.util.llc",
    "CBSA" = "cbsa",
    "CBSA Name" = "cbsa.name",
    "Moves In" = "moves.in",
    "Moves Out" = "moves.out",
    "2010 Pop." = "pop2010",
    "LLC Rank" = "city.rank.llc"
  )





outtex <- stargazer(citylist.printall, summary=F, rownames = F, 
                    title="Estimated Utility. Full List.")



outtex <- c("\\begin{spacing}{.9} \\begin{scriptsize}",outtex)
outtex<- c(outtex,"\\end{scriptsize}\\end{spacing}")

outtex <- gsub("^\\\\begin\\{table\\}.*", "begin{longtable}{rrrlrrrcc}", outtex)
outtex <- gsub("begin{longtable}", "\\begin{longtable}", fixed=T, outtex)
outtex[5]
outtex <- gsub("^.*tabular.*$", "", outtex)
outtex <- gsub("\\end{table}", "\\end{longtable}", outtex, fixed = T)

writeLines(outtex,file("../tex/appendix.city_list.all.tex")) 




#################################
##
##
## Table A3: Births
##
##


migration.cbsa.llc <-  get_agg_cbsa(include_llcs=TRUE)
migration.cbsa.llc <- migration.cbsa.llc %>% 
  filter(!(source.cbsa %in% bad.cbsas) & !(dest.cbsa %in% bad.cbsas))

city.utils <- estimate_obs_and_recpi_utils(migration.cbsa)

city.utils.llc <- estimate_obs_and_recpi_utils(migration.cbsa.llc) %>%
  select(cbsa, city.util, log.city.util, city.rank) %>%
  rename(
    "city.util.llc" = "city.util",
    "log.city.util.llc" = "log.city.util",
    "city.rank.llc" = "city.rank"
  )



cities.dataset <- get_moves_in_out_from_migration_cbsa(migration.cbsa) %>%
  left_join(get_moves_in_out_from_migration_cbsa(migration.cbsa.llc), by=c("cbsa"), suffix=c("",".llc")) %>%
  left_join(get_cbsa_names(), by=c("cbsa")) %>%
  left_join(get_startup_births() %>% mutate(cbsa=as.numeric(cbsa)), by=c("cbsa")) %>%
  left_join(get_cbsa_pop(), by=("cbsa")) %>%
  select(-cbsa.name) %>%
  right_join(city.utils, by=("cbsa")) %>%
  left_join(city.utils.llc , by=("cbsa"))



citylist.all <- cities.dataset

citylist.births <- citylist.all %>%
  select(cbsa, pop2010, cbsa.name, city.corporations, city.llcs) %>%
  mutate( corps.per.pop = round(city.corporations/(pop2010/1000),4), 
          llcs.per.pop = round(city.llcs/(pop2010/1000),4),
          pop2010 = prettyNum(pop2010,big.mark = ","),
          city.corporations = prettyNum(city.corporations,big.mark = ","),
          city.llcs = prettyNum(city.llcs,big.mark = ",")) %>%
  arrange(desc(corps.per.pop)) %>%
  rename(
    "CBSA" = "cbsa",
    "CBSA Name" = "cbsa.name",
    "2010 Pop." = "pop2010",
    "Corporations" = "city.corporations",
    "LLCs" = "city.llcs",
    "Corps / Pop" = "corps.per.pop",
    "LLCs / Pop" = "llcs.per.pop"
  )



citylist.births$LLCs

outtex <- stargazer(citylist.births, summary=F, rownames = F, 
                    title="Number of new Delaware corporations and LLCs (firm births) by city.")



outtex <- c("\\begin{spacing}{.9} \\begin{scriptsize}",outtex)
outtex<- c(outtex,"\\end{scriptsize}\\end{spacing}")

outtex <- gsub("^\\\\begin\\{table\\}.*", "begin{longtable}{rrrlrrr}", outtex)
outtex <- gsub("begin{longtable}", "\\begin{longtable}", fixed=T, outtex)
outtex[5]
outtex <- gsub("^.*tabular.*$", "", outtex)
outtex <- gsub("\\end{table}", "\\end{longtable}", outtex, fixed = T)
writeLines(outtex,file("../tex/appendix.city_list.births.tex")) 


outtex <- gsub("ccccccc","rrrlrrr", outtex)
outtex <- gsub("\\begin{tabular}", "\\resizebox{\\textwidth}{!}{\\begin{tabular}", outtex, fixed = T)
outtex <- gsub("\\end{tabular}", "\\end{tabular}}", outtex, fixed = T)

writeLines(outtex,file("../tex/appendix.city_list.births.tex")) 




#################################
##
##
## Table A4: Appendix Pre Post 2001
##
##

## Before2001
csv_path = str_c("./data/cbsa_matrixCurrentcorp.ypre2001.all.csv")
cbsa_matrix <- bg.fx.load_migration_data_by_cbsa(filepath = csv_path)
utils.pre2001 <- estimate_city_utility(cbsa_matrix)
startup_births.pre2001 <- get_startup_births(years = c(1988:2001))

#after 2001
csv_path = str_c("./data/cbsa_matrixCurrentcorp.ypost2001.all.csv")
cbsa_matrix <- bg.fx.load_migration_data_by_cbsa(filepath = csv_path)
utils.post2001 <- estimate_city_utility(cbsa_matrix)
startup_births.post2001 <- get_startup_births(years = c(2002:2020))

cbsa_pop <- get_cbsa_pop()
cbsa_names <- get_cbsa_names()

citylist.raw.pre2001 <- utils.pre2001 %>%
  inner_join(startup_births.pre2001, by=c("cbsa")) %>%
  select(log.city.util, cbsa, city.rank)

citylist.raw.post2001 <- utils.post2001 %>%
  inner_join(startup_births.post2001, by=c("cbsa")) %>% 
  select(log.city.util, cbsa, city.rank)

citylist.raw <- citylist.raw.pre2001 %>%
  full_join(citylist.raw.post2001, by=c("cbsa"), suffix=c(".pre2001",".post2001")) %>%
  inner_join(cbsa_pop, by=c("cbsa"))  %>%
  inner_join(cbsa_names, by=c("cbsa"))



citylist <- citylist.raw %>%
  arrange(city.rank.pre2001) %>%
  mutate(log.city.util.pre2001= round(log.city.util.pre2001,4),
         log.city.util.post2001= round(log.city.util.post2001,4),
         pop2010str = prettyNum(pop2010,big.mark = ",")
  )


citylist$log.city.util.post2001 = as.double(as.character(citylist$log.city.util.post2001))
citylist$log.city.util.pre2001 = as.double(as.character(citylist$log.city.util.pre2001))



citylist <- citylist %>% filter(pop2010>1000000) %>%
  arrange(-log.city.util.post2001) %>%
  mutate(city.rank.post2001 = dplyr::row_number()) %>%
  arrange(-log.city.util.pre2001) %>%
  mutate(city.rank.pre2001 = dplyr::row_number())


summary(lm(city.rank.post2001 ~ city.rank.pre2001 , data=citylist))


citylist.print <- citylist %>% #reorder variables
  select(cbsa, cbsa.name, city.rank.pre2001, log.city.util.pre2001, 
         city.rank.post2001, log.city.util.post2001, pop2010str) %>%
  rename(
    "CBSA" = "cbsa",
    "CBSA Name" = "cbsa.name",
    "\\makecell{Rank \\\\ 1988-2001}" = "city.rank.pre2001",
    "\\makecell{Log Utility \\\\ 1988-2001}"="log.city.util.pre2001",
    "\\makecell{Rank \\\\ 2002-2015}" = "city.rank.post2001",
    "\\makecell{Log Utility \\\\ 2002-2015}"="log.city.util.post2001",
    "2010 Pop." = "pop2010str"
  )



dim(citylist)
outtex <- stargazer(citylist.print, summary=F, rownames = F, 
                    title="Estimated Utility Before and After 2001")

outtex <- gsub("ccccccc","rrrlrrrrrr", outtex)
#outtex <- gsub("\\begin{tabular}", "\\resizebox{\\textwidth}{!}{\\begin{tabular}", outtex, fixed = T)

outtex <- gsub("^.*tabular.*$", "", outtex)
outtex <- gsub("^.*begin\\{table\\}.*", "\\\\begin{spacing}{.9} \\\\begin{scriptsize}\\\\begin{longtable}{llrrrrr}", outtex)
outtex <- gsub("end\\{table\\}", "end{longtable}\n\\\\end{scriptsize}\\\\end{spacing}", outtex)
outtex

outtex <- gsub("\\textbackslash ", "\\", outtex, fixed = T)
outtex <- gsub("\\{", "{", outtex, fixed = T)
outtex <- gsub("\\}", "}", outtex, fixed = T)

outtex
writeLines(outtex,file("../tex/appendix.city_list_pre_post_2001.tex")) 

####################################################
##
##
##  Firms Across Age Summary Stats
##
##

print("Firms Across Age: Summary Statistics")



all.corps <- bg.fx.load_migration_data_by_firm()
all.llc <- bg.fx.load_migration_data_by_firm(corp=F)



names(all.corps)

all.firms <- rbind(all.corps, all.llc)



all.firms.x <- all.firms %>%
  mutate(years_to_move = replace_na(floor(time_to_move/365),-2)+1,
         getsvc = firstvc != .,
         count = 1,
         patent_assignment = replace_na(patent_assignment, 0),
         patent_application = replace_na(patent_application, 0),
         trademark = replace_na(trademark, 0),
         patent_assignmentby6 = replace_na(patent_assignmentby6, 0),
         patent_applicationby6 = replace_na(patent_applicationby6, 0),
         trademarkby6 = replace_na(trademarkby6, 0),
         destpatent_assignmentby6 = coalesce(destpatent_assignmentby6,patent_assignmentby6) ,
         destpatent_applicationby6 = coalesce(destpatent_applicationby6,patent_applicationby6),
         desttrademarkby6 =coalesce(desttrademarkby6,trademarkby6)
  ) %>%
  filter(years_to_move <= 5) %>%
  mutate(mover_1_5 = years_to_move >= 1,
        mover_3_5 = years_to_move >= 3)




all.firms.group <- all.firms.x %>%
  group_by(years_to_move) %>%
  summarise(num = sum(count),
            is_corp = mean(is_corp),
            patent_application = mean(patent_application),
            patent_assignment = mean(patent_assignment),
            trademark = mean(trademark),
            is_HighTech = mean(is_HighTech),
            shortname = mean(shortname),
            eponymous = mean(eponymous),
            destpatent_applicationby6 = mean(destpatent_applicationby6),
            destpatent_assignmentby6 = mean(destpatent_assignmentby6),
            desttrademarkby6 = mean(desttrademarkby6),
            acq = mean(acq),
            ipo = mean(ipo)) %>%
  mutate(years_to_move = as.character(years_to_move)) %>%
  mutate(years_to_move = if_else(years_to_move == "-1", "Did not move",years_to_move)) %>%
  rename(
    "\\textit{\\makecell{Year of \\\\Migration}}"=years_to_move,
    "\\textit{Count}" = num,
    "\\textit{Corporation}" = is_corp,
    "\\textit{\\makecell{Patent\\\\Application\\\\at Founding}}" = patent_application,
    "\\textit{\\makecell{Patent\\\\ Assignment\\\\at Founding}}" = patent_assignment,
    "\\textit{\\makecell{Trademark\\\\at Founding}}" = trademark,
    "\\textit{High Tech}" = is_HighTech,
    "\\textit{Short Name}" = shortname,
    "\\textit{Eponymous}" = eponymous,
    
    "\\textit{\\makecell{Patent\\\\ Application\\\\in 6 Years}}" = destpatent_applicationby6,
    "\\textit{\\makecell{Patent\\\\ Assignment\\\\in 6 Years}}" = destpatent_assignmentby6,
    "\\textit{\\makecell{Trademark \\\\ in 6 Years}}" = desttrademarkby6,
    "\\textit{Acquired}" = acq,
    "\\textit{IPO}" = ipo
  ) %>%
  mutate(across(where(is.numeric), round, digits=3))




ttest_cols = c("is_corp", "patent_application", "patent_assignment", "trademark",
         "is_HighTech", "shortname", "eponymous", 
         "destpatent_applicationby6", "destpatent_assignmentby6", 
         "desttrademarkby6", "acq", "ipo")

row_ttest_none_vs_1_to_5 <- c("")
row_ttest_1_2_vs_3_to_5 <- c("")

for(col in ttest_cols){
  ttest_formula <- as.formula(paste0(col," ~ mover_1_5"))
  ttest_res <- t.test(ttest_formula, data=all.firms.x)
  ttest_string = paste0((round(ttest_res$statistic,digits=3)), case_when(ttest_res$p.value < .01 ~ "***",
                                                                                ttest_res$p.value < .05 ~ "**",
                                                                                ttest_res$p.value < .1 ~ "*",
                                                                                TRUE ~ ""))
  
  row_ttest_none_vs_1_to_5 <- c(row_ttest_none_vs_1_to_5 , ttest_string)  
  
  ttest_formula <- as.formula(paste0(col," ~ mover_3_5"))
  ttest_res <- t.test(ttest_formula, data=all.firms.x %>% filter(years_to_move > 0 ))
  ttest_string2 = paste0((round(ttest_res$statistic,digits=3)), case_when(ttest_res$p.value < .01 ~ "***",
                                                                                ttest_res$p.value < .05 ~ "**",
                                                                                ttest_res$p.value < .1 ~ "*",
                                                                                TRUE ~ ""))
  
  row_ttest_1_2_vs_3_to_5 <- c(row_ttest_1_2_vs_3_to_5 , ttest_string2)  
  
  print(paste0(col , "   ", ttest_string, "  ", ttest_string2))
  
}

t(row_ttest_1_2_vs_3_to_5)

length(ttest_cols)
x = as.data.frame(rbind(c("\\hline \\\\ \n \\emph{\\textbf{T-Tests}} \\\\ Years 3-5 vs 1-2",row_ttest_1_2_vs_3_to_5),
                        c("Years 1-5 vs Did not move",row_ttest_none_vs_1_to_5)
                        ))
dim(x)
names(x) = names(all.firms.group)
latex_table <- xtable::xtable(rbind(all.firms.group,x),caption="Summary Statistics of Firms Across Mover Age")


print(latex_table, scalebox=.7,
      include.rownames=FALSE, sanitize.text.function = function(x){x}, caption.placement="top")


print(latex_table, scalebox=.7,
      include.rownames=FALSE, file="../tex/firms__across_age_summary_stats.tex",sanitize.text.function = function(x){x}, caption.placement="top")
  


################################################
##
##
##  Differences between moves to / from startup hubs.
##
##
##

all.firms.x <- all.firms.x %>%
    mutate(mover_0_2 = years_to_move >= 0 &years_to_move <=2,
         mover_3_5 = years_to_move >= 3 &years_to_move <=5)


names(all.firms.x)[grep("move",names(all.firms.x))]

all.firms.x$startup_hub  <- grepl("San Fran|San Jose|Boston|Austin|New York",all.firms.x$area)
hub_cbsas <- unique(all.firms.x$cbsa[all.firms.x$startup_hub])

all.firms.x <- all.firms.x %>%
  mutate(
    category = case_when(
      (mover_0_2 == TRUE & cbsa %in% hub_cbsas) ~ "Moved to Hub: 0-2",
      (mover_0_2 == TRUE & !(cbsa %in% hub_cbsas)) ~ "Moved to Non Hub: 0-2",
      (mover_3_5 == TRUE & cbsa %in% hub_cbsas) ~ "Moved to Hub: 3-5",
      (mover_3_5 == TRUE & !(cbsa %in% hub_cbsas)) ~ "Moved to Non Hub: 3-5",
      (move5 == FALSE & cbsa %in% hub_cbsas) ~ "Born in Startup Hub",
      (move5 == FALSE & !(cbsa %in% hub_cbsas)) ~ "Born outside Startup Hub",
      TRUE ~"other"
    )
  ) 

all.firms.stats <- all.firms.x %>%
  group_by(category) %>%
  summarise(num = sum(count),
            is_corp = mean(is_corp),
            patent_application = mean(patent_application),
            patent_assignment = mean(patent_assignment),
            trademark = mean(trademark),
            is_HighTech = mean(is_HighTech),
            shortname = mean(shortname),
            eponymous = mean(eponymous)) %>%
  filter(category != "other") %>%
  rename(
    "\\textit{Count}" = num,
    "\\textit{Corporation}" = is_corp,
    "\\textit{\\makecell{Patent\\\\Application\\\\at Founding}}" = patent_application,
    "\\textit{\\makecell{Patent\\\\ Assignment\\\\at Founding}}" = patent_assignment,
    "\\textit{\\makecell{Trademark\\\\at Founding}}" = trademark,
    "\\textit{High Tech}" = is_HighTech,
    "\\textit{Short Name}" = shortname,
    "\\textit{Eponymous}" = eponymous
  ) %>%
  mutate(across(where(is.numeric), round, digits=3))

latex_data <- t(all.firms.stats)


latex_table <- xtable::xtable(all.firms.stats,caption="Summary Statistics of Firms Hubs vs Non Hubs")


print(latex_table, scalebox=.7,
      include.rownames=TRUE, sanitize.text.function = function(x){x}, caption.placement="top")


print(latex_table, scalebox=.7,
      include.rownames=FALSE, file="../tex/hubs_vs_non_hubs.tex",sanitize.text.function = function(x){x}, caption.placement="top")


######################################
##
##
##  Coporate Taxes
##
##
##



settings.corp_or_llc = "corp"

## Estimate migration CBSA values based on different areas.
year_range = "all"
get_city_utils <- function(year_range, cutoff_joint = 4, corp_or_llc = "corp") {
  csv_path = str_c("./data/cbsa_matrixCurrent",corp_or_llc,".",year_range,".csv")
  print(str_c("Loading csv from path: ", csv_path))
  cbsa_matrix <- bg.fx.load_migration_data_by_cbsa(filepath = csv_path)
  in_out <- get_moves_in_out_from_migration_cbsa(cbsa_matrix)
  utils <- estimate_city_utility(cbsa_matrix, cutoff_joint = cutoff_joint) %>%
    left_join(in_out)
  return (utils)
}

utils.0_5 <- get_city_utils("all", corp_or_llc =  settings.corp_or_llc) 
utils.0_1 <- get_city_utils("0-1", corp_or_llc =  settings.corp_or_llc) %>% select(cbsa,city.rank, log.city.util)
utils.0_2 <- get_city_utils("0-2", corp_or_llc =  settings.corp_or_llc) %>% select(cbsa,city.rank, log.city.util)
utils.2_5 <- get_city_utils("2-5", corp_or_llc =  settings.corp_or_llc) %>% select(cbsa,city.rank, log.city.util)



utils_reg <- utils.0_2 %>%
  inner_join(utils.2_5, by=c("cbsa"), suffix=c(".0_2",".2_5")) 

dim(utils_reg)

utils_all <- utils.0_5 %>%
  left_join(get_cbsa_names()) %>%
  left_join(get_cbsa_pop()) %>%
  left_join(get_startup_births())%>%
  mutate(births_per_pop = city.DEobs /pop2010 ,
         log_births_per_pop = log(births_per_pop),
         log_pop = log(pop2010)) %>%
  filter(!grepl("Towson, MD",cbsa.name))

dim(utils_all)

utils_all <- utils_all %>% inner_join(utils.0_2, by=c("cbsa"), suffix=c(".0_5",".0_2")) 

utils_all <- utils_all %>% inner_join(utils.2_5, by=c("cbsa"))
utils_all <- utils_all %>% inner_join(utils.0_1, by=c("cbsa"), suffix=c(".2_5",".0_1"))

names(utils_all)


utils_reg <- utils_reg %>%
  left_join(get_cbsa_names()) %>%
  left_join(get_cbsa_pop()) %>%
  left_join(get_startup_births())%>%
  mutate(births_per_pop = city.DEobs /pop2010 ,
         log_births_per_pop = log(births_per_pop),
         log_pop = log(pop2010)) %>%
  filter(!grepl("Towson, MD",cbsa.name))


new_hhi <- readstata13::read.dta13("./data/hhi.dta") %>%
  rename("cbsa"="msa")
names(new_hhi)
data.external<-read.csv("./data/external_nonalbouy_185cbsas.csv")%>%
  filter(year == 2010)



regression_data.all <- utils_reg %>% 
  inner_join(data.external, by="cbsa")  %>%
  arrange(-births_per_pop) %>%
  left_join(new_hhi,by=c("cbsa") )



data.albouy<- read.csv("./data/Albouy_185cbsas.csv") 


regression_data.all <-   regression_data.all %>%
  left_join(data.albouy, by=c("cbsa"))

regression_data <- regression_data.all %>%
  arrange(-pop) %>%
  filter(pop2010 > 500000)

reg_data_a <- regression_data %>% 
  select(births_per_pop , log_births_per_pop,tax.average_tax_rate_50th_perc, pop,cbsa,log.city.util.0_2, 
         cbsa.name,log_hhi2, patents_per_capita, tax.average_tax_rate_95th_perc, static_albouy.quality_of_life) %>%
  mutate(early = 1, later =0) %>%
  rename( "log.city.util" = "log.city.util.0_2")

reg_data_b <- regression_data %>% 
  select(births_per_pop , log_births_per_pop,tax.average_tax_rate_50th_perc, pop,cbsa,log.city.util.2_5, cbsa.name, 
         log_hhi2, patents_per_capita,tax.average_tax_rate_95th_perc, static_albouy.quality_of_life) %>%
  mutate(early = 0,later=1) %>%
  rename( "log.city.util" = "log.city.util.2_5")

reg_data_2 <- rbind(reg_data_a, reg_data_b) %>%
  mutate(log_patents_per_pop = log(patents_per_capita))


reg_data_2 <-reg_data_2[!grepl("Albany",reg_data_2$cbsa.name),]


corporate_taxes <- readstata13::read.dta13("./data/corporate_tax_rates.dta") %>%
  group_by(state) %>%
  summarise(cit = mean(cit_state))

reg_data_2$cbsa.state = str_match(reg_data_2$cbsa.name, ", (\\w\\w)")[,2]

reg_data_2 <- reg_data_2 %>%
  left_join(corporate_taxes, by=c("cbsa.state"="state"))



regression_weights = reg_data_2$births_per_pop
reg1 = lm(log_births_per_pop~ cit, data=reg_data_2, weights=regression_weights)
reg2 = lm(log.city.util~cit, data=reg_data_2, weights=regression_weights)
reg3 = lm(log.city.util~ cit*later , data=reg_data_2, weights=regression_weights)
reg4 = lm(log.city.util~ cit + tax.average_tax_rate_95th_perc , data=reg_data_2, weights=regression_weights)
reg5 = lm(log.city.util~ cit*later + tax.average_tax_rate_95th_perc  , data=reg_data_2, weights=regression_weights)


stargazer(reg1, reg2 ,reg3, reg4, reg5, 
          out = "../tex/appendix.corporate_taxes.tex",
          se=starprep(reg1, reg2, reg3, reg4, reg5),
          star.cutoffs = c(.1, .05, .001),
          column.labels = c("City Entrepreneurship","City Utility","City Utility","City Utility","City Utility"),
          dep.var.labels  = c("\\emph{Baseline}", "\\emph{Corporate Taxes}", "\\emph{Corporate Taxes}"),
          covariate.labels =  c("Corporate Income Taxes",
                                "Corporate Income Taxes $\\times$  Later Movers (Years 3-5)",
                                "Personal Income Tax at 95th Percentile"),
          keep =c("cit","tax"),
          omit.stat=c("adj.rsq", "ser","f"),
          omit.table.layout = "n",
          omit=c("Constant"),
          float=FALSE
)


#######################################################################
##
##
##
##  LLC Table
##
##


settings.corp_or_llc = "llc"

## Estimate migration CBSA values based on different areas.
year_range = "all"
get_city_utils <- function(year_range, cutoff_joint = 4, corp_or_llc = "corp") {
  csv_path = str_c("./data/cbsa_matrixCurrent",corp_or_llc,".",year_range,".csv")
  print(str_c("Loading csv from path: ", csv_path))
  cbsa_matrix <- bg.fx.load_migration_data_by_cbsa(filepath = csv_path)
  in_out <- get_moves_in_out_from_migration_cbsa(cbsa_matrix)
  utils <- estimate_city_utility(cbsa_matrix, cutoff_joint = cutoff_joint) %>%
    left_join(in_out)
  return (utils)
}

utils.0_5 <- get_city_utils("all", corp_or_llc =  settings.corp_or_llc) 
utils.0_2 <- get_city_utils("0-2", corp_or_llc =  settings.corp_or_llc) %>% select(cbsa,city.rank, log.city.util)
utils.2_5 <- get_city_utils("2-5", corp_or_llc =  settings.corp_or_llc) %>% select(cbsa,city.rank, log.city.util)



utils_reg <- utils.0_2 %>%
  inner_join(utils.2_5, by=c("cbsa"), suffix=c(".0_2",".2_5")) 


utils_reg <- utils_reg %>%
  left_join(get_cbsa_names()) %>%
  left_join(get_cbsa_pop()) %>%
  left_join(get_startup_births())%>%
  mutate(births_per_pop = city.DEobs /pop2010 ,
         log_births_per_pop = log(births_per_pop),
         log_pop = log(pop2010)) %>%
  filter(!grepl("Towson, MD",cbsa.name))


new_hhi <- readstata13::read.dta13("./data/hhi.dta") %>%
  rename("cbsa"="msa")
names(new_hhi)
data.external<-read.csv("./data/external_nonalbouy_185cbsas.csv") %>%
      filter(year == 2000) 



regression_data.all <- utils_reg %>% 
  inner_join(data.external, by="cbsa") %>%
  arrange(-births_per_pop) %>%
  left_join(new_hhi,by=c("cbsa") )



data.albouy<- read.csv("./data/Albouy_185cbsas.csv") 


regression_data.all <-   regression_data.all %>%
  left_join(data.albouy, by=c("cbsa"))

regression_data <- regression_data.all %>%
  arrange(-pop) %>%
  #filter(dplyr::row_number()<=50)
  filter(pop2010 > 500000)


reg_data_a <- regression_data %>% 
  select(births_per_pop , log_births_per_pop,tax.average_tax_rate_50th_perc, pop,cbsa,log.city.util.0_2, 
         cbsa.name,log_hhi2, patents_per_capita, tax.average_tax_rate_95th_perc, static_albouy.quality_of_life) %>%
  mutate(early = 1, later =0) %>%
  rename( "log.city.util" = "log.city.util.0_2")

reg_data_b <- regression_data %>% 
  select(births_per_pop , log_births_per_pop,tax.average_tax_rate_50th_perc, pop,cbsa,log.city.util.2_5, cbsa.name, 
         log_hhi2, patents_per_capita,tax.average_tax_rate_95th_perc, static_albouy.quality_of_life) %>%
  mutate(early = 0,later=1) %>%
  rename( "log.city.util" = "log.city.util.2_5")

reg_data_2 <- rbind(reg_data_a, reg_data_b) %>%
  mutate(log_patents_per_pop = log(patents_per_capita))


reg_data_2 <-reg_data_2[!grepl("Albany",reg_data_2$cbsa.name),]


regression_weights = reg_data_2$births_per_pop

reg1 = lm(log.city.util~log_births_per_pop*later, data=reg_data_2, weights=regression_weights)
reg2 = lm( log_births_per_pop~ log_hhi2  + log_patents_per_pop, data=reg_data_2, weights=regression_weights)
reg3 = lm( log.city.util~ log_hhi2 + later+  + log_hhi2:later , data=reg_data_2, weights=regression_weights)
reg4 = lm( log.city.util~  + later+ log_patents_per_pop +  log_patents_per_pop:later, data=reg_data_2, weights=regression_weights)
reg5 = lm(log_births_per_pop~tax.average_tax_rate_95th_perc   , data=reg_data_2, weights=regression_weights)
reg6 = lm(log_births_per_pop~ tax.average_tax_rate_50th_perc , data=reg_data_2, weights=regression_weights)

reg7 = lm(  log.city.util~ tax.average_tax_rate_95th_perc + later +tax.average_tax_rate_95th_perc:later, data=reg_data_2, weights=regression_weights)
reg8 = lm(  log.city.util~ tax.average_tax_rate_50th_perc + later +tax.average_tax_rate_50th_perc:later, data=reg_data_2, weights=regression_weights)




regression_covariate_labels = c("Growth Startups per Capita",
                                "Growth Startups per Capita $\\times$ Later Movers (Years 3-5)",
                                "Industry Concentration (HHI)",
                                "Industry Concentration (HHI) $\\times$ Later Movers (Years 3-5)",
                                "Patenting per Capita",
                                "Patenting per Capita $\\times$ Later Movers (Years 3-5)",
                                "Personal Income Tax (95th)",
                                "Personal Income Tax (95th) $\\times$ Later Movers (Years 3-5)",
                                "Personal Income Tax (50th)",
                                "Personal Income Tax (50th) $\\times$ Later Movers (Years 3-5)",
                                "Quality of Life Index",
                                "Quality of Life Index $\\times$ Later Movers (Years 3-5)"
                                
)

regression_variable_order = c("log_births_per_pop$","log_births_per_pop:later","^log_hhi2$","log_hhi.*","log_patents_per_pop$","log_patents.*",
                              "^tax.average_tax_rate_95th_perc$","tax.average_tax_rate_95th_perc.*",
                              "^tax.average_tax_rate_50th_perc$","tax.average_tax_rate_50th_perc.*",
                              "^static_albouy.quality_of_life$","static_albouy.quality_of_life.*")

out_tex = stargazer(reg1, reg2, reg3, reg4, reg5,reg6,reg7,reg8,
          se=starprep(reg1, reg2, reg3,reg4, reg5,reg6,reg7,reg8),
          star.cutoffs = c(.1, .05, .001), 
             column.labels = c("\\makecell{\\\\ \\emph{Migrant} \\\\ City Utility}",
                            "\\makecell{\\\\ City \\\\  Entrepreneurship}",
                            "\\makecell{\\\\ \\emph{Migrant} \\\\ City Utility}","\\makecell{\\\\ \\emph{Migrant} \\\\ City Utility}",
                            "\\makecell{\\\\ City \\\\ Entrepreneurship}",
                            "\\makecell{\\\\ City \\\\ Entrepreneurship}",                            
                            "\\makecell{\\\\ \\emph{Migrant} \\\\ City Utility}","\\makecell{\\\\ \\emph{Migrant} \\\\ City Utility}"),
        dep.var.labels  = c("\\textbf{Baseline}",
                              "\\multicolumn{3}{|c|}{\\textbf{Nursery Cities}xxx","","}",
                              "\\multicolumn{4}{|c|}{\\textbf{Income Taxes}}xxx","","",
                              ""),
            multicolumn = F,
          covariate.labels   = regression_covariate_labels,
          order = regression_variable_order,
          keep = regression_variable_order,          
#          out=
          omit.stat=c("adj.rsq", "ser","f"),
          omit.table.layout = "n",
          omit=c("Constant"),
          float=FALSE)

out_tex= gsub(pattern = "xxx[ &]*",replacement = "",x = out_tex)
out_tex= gsub(pattern = "endhere.*$",replacement = "\\\\",x = out_tex)



writeLines(out_tex, file("../tex/llc_table_city_util.tex"))
           
#######################################################################
##
##
## Migration and Distance
##
##
##



library(geosphere)
library(fixest)
migration.cbsa.orig  <- get_agg_cbsa(include_llcs=FALSE)
migration.cbsa.llc <-  get_agg_cbsa(include_llcs=TRUE)

names(migration.cbsa)
cbsa_names = get_cbsa_names()

cbsa_codes <- read.csv("./data/cbsas_geocoded.csv")

source_cbsa <- cbsa_codes %>%
  select(cbsa, lat, lon) %>%
  rename("source.cbsa" = "cbsa",
         "source.lat" = "lat",
         "source.lon" = "lon")


dest_cbsa <- cbsa_codes %>%
  select(cbsa, lat, lon) %>%
  rename("dest.cbsa" = "cbsa",
         "dest.lat" = "lat",
         "dest.lon" = "lon")

migration.cbsa <- migration.cbsa.orig %>%
  left_join(source_cbsa) %>%
  left_join(dest_cbsa)


migration.cbsa <- migration.cbsa %>% 
  rowwise() %>%
  mutate(dist = geosphere::distm(c(source.lon,source.lat), c(dest.lon,dest.lat), fun=distHaversine))

migration.cbsa <- migration.cbsa %>% 
  mutate(log_dist = log10(dist))



reg1 = fixest::feols(move5 ~ log_dist, data=migration.cbsa)
reg2 = fixest::feols(move5 ~ log_dist|source.cbsa, data=migration.cbsa)
reg3 = fixest::feols(move5 ~ log_dist|source.cbsa + dest.cbsa, data=migration.cbsa)
out_tex = etable( list(reg1, reg2, reg3), vcov = ~source.cbsa + dest.cbsa,
        tex=TRUE, 
        title ="Distance and migration rates. Dep. Var. log(migrants+1).",
        drop="(Intercept)",
        depvar=FALSE,        
        dict = c("log_dist" = "Log10(Distance)",
                 "source.cbsa" = "Source CBSA FE",
                 "dest.cbsa" = "Dest CBSA FE"),
        notes="The impact of distance on the migration counts across locations conditional on region fixed-effects is statistcally
              positive but not economically meaningful. The range of the Log10(Distance) variable is from 4.5 to 7. Going from the
            closest to the furthest pair only increases mgiration rates by 0.03%.")

writeLines(out_tex, file("../tex/migration_and_distance.tex"))

#######################################################################
##
##
## Amenities Regressions
##
##
##


data.moves <- read.csv("./data/city_utilities.csv")
data.albouy<- read.csv("./data/Albouy_185cbsas.csv")
data.external<-read.csv("./data/external_nonalbouy_185cbsas.csv")





## Choose one of the two
use_only_2010 = FALSE
if(use_only_2010) {
  print("Using only 2010 measures")
  data.external <- data.external %>%
    filter(year == 2010)
}else{
  #### (b) use the average
  data.external <- data.external %>% 
    group_by(cbsa) %>% 
    summarise(across(everything(), list(mean), na.rm = TRUE)) 
  names(data.external) = gsub("_1","",names(data.external))
}


moves <- data.moves %>% 
  inner_join(data.albouy, by="cbsa") %>%
  inner_join(data.external, by="cbsa")


new_hhi <- readstata13::read.dta13("./data/hhi.dta") %>%
  rename("cbsa"="msa")%>%
  mutate(log_hhi2 = log(hhi2))

moves <- moves %>%
  left_join(new_hhi,by=c("cbsa") ) 

moves$bohemia = moves$bohemia * 100


moves$homevaluemodified <- moves$average_home_value/100000
moves$static_amenities.heating_days1000 = moves$static_amenities.heating_days/1000
moves$static_amenities.cooling_days1000 = moves$static_amenities.cooling_days/1000
reg1.a <- lm(log.city.util ~ static_amenities.cooling_days1000 + static_amenities.heating_days1000 + static_amenities.sunshine_pct 
             +static_amenities.inv_dist_from_water + static_amenities.latitude, 
             data =moves)

reg2.a <-  lm(log.city.util ~ homevaluemodified, data=moves)

reg3.a <-  lm(log.city.util ~ static_albouy.quality_of_life , data =moves )

reg4.a <- lm(log.city.util ~  bohemia, data =moves)

reg5.a <- lm(log.city.util.llc ~ static_amenities.cooling_days1000 + static_amenities.heating_days1000 + static_amenities.sunshine_pct 
             +static_amenities.inv_dist_from_water + static_amenities.latitude, 
             data =moves)


reg6.a <-  lm(log.city.util.llc~ homevaluemodified, data =moves )

reg7.a <-  lm(log.city.util.llc ~static_albouy.quality_of_life , data =moves )

reg8.a <-  lm(log.city.util.llc ~bohemia , data =moves )


#stargazer(reg8.a)

#stargazer( reg1.a, reg2.a, reg3.a, reg4.a, reg5.a,reg6.a, reg7.a, reg8.a, type="text",  report=('vc*p'))

#summary(reg1.a)
#?modelsummary

library(modelsummary)
options(modelsummary_format_numeric_latex = "plain")
outtex = modelsummary(list(reg1.a, reg2.a, reg3.a, reg4.a, reg5.a,reg6.a, reg7.a, reg8.a),  vcov = "robust",
             coef_rename = c(
               "static_amenities.cooling_days1000" = "Cooling Degree Days /1000",
               "static_amenities.heating_days1000" = "Heating Degree Days /1000",
               "static_amenities.sunshine_pct" = "Sunshine Percentage",
               "static_amenities.inv_dist_from_water" = "Inverse Dist. from Water",
               "static_amenities.latitude" = "Latitude",
               "homevaluemodified" = "Average Home Value",
               "static_albouy.quality_of_life" = "Quality of Life Index",
               "bohemia" = "Bohemia"
               
             ), coef_omit = c(1),
             fmt=4,
             stars = c("*" = .1, "**" = .05, "***" = .01), output="latex",
             title = "Amenities: Do Local Amenties Correlate to Estimated City Utility?",
             gof_omit = "IC|R2|FE|stat1|stat2|RMSE|Std") %>%
  kableExtra::add_header_above(c(" " = 1, "Corporations" = 4, "LLCs" = 4))


footnote= "OLS regression with city utility as the dependent variable. City utility is our estimated measure from the underlying graph of moves across cities in the United States. Columns (1)- (4) use the utility estimated through the moves of corporations registered under Delaware jurisdiction (but domiciled anywhere in the U.S.). Columns (5)-(8) use the utility estimated through the moves of LLCs registered under Delaware jurisdiction. We are able to estimate underlying utility for 183 cities using corporations, and 123 cities using LLCs. Cooling and Heating Degree Days are the cumulative gaps in days times temperature throughout a year where cooling or heating is needed as the temperature deviates from 65F. Inv. Distance to Water is the log if 1 over the distance (in miles) to the coast or to a Great Lake. Sunshine Percentage is the share of sunny days in a region. Inverse Dist. to Water is the log of the inverse distance to either the ocean or the Great Lakes.  Average Home Value is denoted in units of 100,000 dollars. Quality of Life Index uses Albouy (2007) updated estimates of summmary quality of life taking into account these amenities.  Bohemia is estimated  is estimated by replicating the methodology in Florida (2001): the location quotient of the incidence of professionals in  bohemian occupations.  These occupations are: authors; designers; musicians and composers; actors and directors; craft-artists, painters, sculptors, and artist printmakers; photographers; dancers; and artists, performers, and related workers.  Robust standard errors in parenthesis. Significance denoted as $^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01"
writeLines(outtex, file("../tex/amenities_regressions.tex"))





#########################################
##
##
## Panel Summary Stats
##
##

sumstats_cols <- data.frame(matrix(ncol=2,nrow=7))
names(sumstats_cols) <- c("variables","labels")
sumstats_cols$variables <- c("pop",
                             "log_hhi2",
                             "patents_per_capita",
                             "tax.average_tax_rate_50th_perc",
                             "tax.average_tax_rate_95th_perc",
                             "city.corporations",
                             "city.llcs")



sumstats_cols$labels <- c("Population",
                          "Log(HHI)",
                          "Patents per Thousand Pop",
                          "Income Tax at 50th Perc.",
                          "Income Tax at 95th Perc.",
                          "Delaware Corporations",
                          "Delaware LLCs")

sumstats.external <- moves %>%
  select(sumstats_cols$variables) %>%
  relocate(sumstats_cols$variables) 



stargazer(sumstats.external, type="text",
          summary.stat=c("mean","sd", "N"),
          digits=3,
          covariate.labels=sumstats_cols$labels)

outtex = stargazer(sumstats.external, type="latex", 
                   summary.stat=c("mean","sd", "N"),
                   covariate.labels=sumstats_cols$labels,
                   title="Summary Statistics for Metropolitan Areas")

library(starpolishr)
# Insert all the subtitles
outtex = star_insert_row(outtex, c("\\textbf{Industry Measures} \\\\"), insert.after=grep("Industry",outtex)-1)
#outtex = star_insert_row(outtex, c("\\textbf{Education and Idea Generation} \\\\"), insert.after=grep("Share College Educated",outtex)-1)
outtex = star_insert_row(outtex, c("\\textbf{Income Tax} \\\\"), insert.after=grep("Income Tax at 50th",outtex)-1)
#outtex = star_insert_row(outtex, c("\\textbf{Venture Capital} \\\\"), insert.after=grep("Total VC Invested",outtex)-1)
#outtex = star_insert_row(outtex, c("\\textbf{Amenities} \\\\"), insert.after=grep("Average Home Value",outtex)-1)
#outtex = star_insert_row(outtex, c("\\textbf{Creative Industries} \\\\"), insert.after=grep("Bohemia",outtex)-1)
outtex = star_insert_row(outtex, c("\\textbf{Startup Cartography Project} \\\\"), insert.after=grep("Delaware Corporations",outtex)-1)



writeLines(outtex,file("../tex/panel_summary_stats.tex"))





#################################################################
###
###
###   table on quality of destination 
###
###



get_city_utils <- function(year_range) {
  in_out <- get_moves_in_out_from_migration_cbsa(cbsa_matrix)
  utils <- estimate_city_utility(cbsa_matrix) %>%
    left_join(in_out)
  return (utils)
}

migration_data <- bg.fx.load_migration_data_by_firm(only_if_missing=TRUE)


names(migration_data)
cbsa_matrix.N <- bg.fx.load_migration_data_by_cbsa(filepath = str_c("./data/cbsa_matrixCurrentcorp.all.csv"))
cbsa_matrix.q <- bg.fx.load_migration_data_by_cbsa(filepath = str_c("./data/cbsa_matrixCurrentcorp.quality_adjusted.all.csv"))


cbsa_matrix.q <- cbsa_matrix.q %>% select(source.cbsa, dest.cbsa, move5) %>% rename("quality"= "move5")
cbsa_matrix <- cbsa_matrix.N %>% 
  select(source.cbsa, dest.cbsa, move5) %>%
  inner_join(cbsa_matrix.q)  %>%
  mutate(move5recpi = move5* quality)


head(cbsa_matrix)

names(cbsa_matrix)

cbsa_quality.out <- cbsa_matrix %>%
  filter(move5 > 0) %>%
  group_by(source.cbsa) %>%
  summarise(move_out_recpi = sum(move5recpi),
            move_out_total=  sum(move5)) %>%
  mutate(move_out_quality = move_out_recpi / move_out_total) %>%
  rename("cbsa" = source.cbsa)

head(cbsa_quality.out, 40)


cbsa_quality.in <- cbsa_matrix %>%
  filter(move5 > 0) %>%
  group_by(dest.cbsa) %>%
  summarise(move_in_recpi = sum(move5recpi),
            move_in_total=  sum(move5)) %>%
  mutate(move_in_quality = move_in_recpi / move_in_total) %>%
  rename("cbsa" = dest.cbsa)

head(cbsa_quality.in, 40)

startup.births <- get_startup_births()
pop <- get_cbsa_pop() 

cbsa_quality <- cbsa_quality.in %>%
  inner_join(cbsa_quality.out) %>% 
  inner_join(startup.births) %>%
  inner_join(pop) %>%
  mutate(eship_per_pop = city.DEobs/pop2010,
         log_pop = log(pop2010))


data.moves <- read.csv("./data/city_utilities.csv")
cbsa_quality<- cbsa_quality %>%
  filter(cbsa %in% data.moves$cbsa)

cbsa_quality %>% select(move_in_quality, move_out_quality)

reg1 <- lm(log(move_in_quality) ~ log(move_out_quality), data = cbsa_quality)
reg1.se <- sqrt(diag(vcovHC(reg1, type = "HC1")))

reg2 <- lm(log(move_in_quality) ~ log(move_out_quality) + log(eship_per_pop), data = cbsa_quality)
reg2.se <- sqrt(diag(vcovHC(reg2, type = "HC1")))

reg3 <- lm(log(move_in_quality) ~ log(move_out_quality) + log(eship_per_pop), data = cbsa_quality, weights=cbsa_quality$log_pop)
reg3.se <- sqrt(diag(vcovHC(reg3, type = "HC1")))

stargazer(reg1, reg2, reg3, type ="text",  se=list(reg1.se, reg2.se, reg3.se))





stargazer(reg1,reg2,reg3,   type="latex",
          out="../tex/appendix.city_quality_regressions.tex",
          se=list(reg1.se, reg2.se,reg3.se ),
          title="Idea Generation and Migrant Utility",
          omit=c("Constant"),
          keep.stat=c("n","rsq"),
          covariate.labels = c("log(Avg. Out Mover Quality)",
                               "Log(Delaware Startups Per Capita)"),
          dep.var.caption = c("log(Avg. In Mover Quality)"),
          
          float=FALSE,    omit.table.layout = "n")
